rm(list=ls())
library(tidyverse)
options(max.print=1000)
library(maps)
library(mapdata)
library(lubridate)
library(viridis)
library(wesanderson)
library(RColorBrewer)
library(plotly)
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_")
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
zoom in on US states and territories (diff date than above)
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
prettier version based on an example by Anisa Dhana borders is used to define the areas in the map
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
# Cleaning up the graph
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
Mapping data to shapes
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
group_by(Province_State) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Province_State = tolower(Province_State))
# load the US map data
us <- map_data("state")
# We need to join the us map data with our daily report to make one data frame/tibble
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
using R palattes
# plot state map
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in the US'")
colorblind friendly
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
# display.brewer.all(colorblindFriendly = TRUE)
# Get and format the covid report data
report_03_27_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
# dim(report_03_27_2020)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
Lets look just MA
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Massachusetts") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2"))
# plot state map
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Massachusetts'")
note the cases on nantucket and dukes counties were reporte as one value and not included on the graph there is also an assigned category that includes 303 confirmed cases of 3/31/2020
daily_report
## # A tibble: 14 x 2
## Admin2 Confirmed
## <chr> <dbl>
## 1 barnstable 283
## 2 berkshire 213
## 3 bristol 424
## 4 dukes and nantucket 12
## 5 essex 1039
## 6 franklin 85
## 7 hampden 546
## 8 hampshire 102
## 9 middlesex 1870
## 10 norfolk 938
## 11 plymouth 621
## 12 suffolk 1896
## 13 unassigned 270
## 14 worcester 667
we will use plotly to make interactive graphs with the maps
ggplotly(
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
ggtitle("COVID-19 Cases in MA") +
# Cleaning up the graph
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)
For the above graph “World COVID-19 Confirmed case” summarize the counts for each Country on the graph and update the graph to 9/26/2020. You may need to adjust the size of the points.
daily_report_worldwide <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv"))%>%
rename(Long = "Long_") %>%
group_by(Country_Region, Long, Lat) %>%
summarize(Confirmed = sum(Confirmed))
ggplot(daily_report_worldwide, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='red', fill='red', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases by Country/Region',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
Update Anisa Dhana’s graph layout of the US to 9/26/2020. You may need to adjust the size of the points.
daily_report_09_26_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report_09_26_2020, aes(x = Long, y = Lat, size = 3, color = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=1, size=1.75) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
Update the above graph “Number of Confirmed Cases by US County” to 9/26/2020 and use a different color scheme or theme
report_09_26_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
state_join <- left_join(counties, report_09_26_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "Blues"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
Make an interactive plot using a state of your chosing using a theme different from used in the above exammples.
daily_report_oh <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Ohio") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
us <- map_data("state")
oh_us <- subset(us, region == "ohio")
counties <- map_data("county")
oh_county <- subset(counties, region == "ohio")
state_join_oh <- left_join(oh_county, daily_report_oh, by = c("subregion" = "Admin2"))
# plot state map
ggplot(data = oh_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join_oh, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "Blues"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Ohio'")
ggplotly(
ggplot(data = oh_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
geom_polygon(data = state_join_oh, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
brewer.pal(n=5, name = "Paired")) +
ggtitle("COVID-19 Cases in Ohio") +
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)